setwd("mallet-2.0.7")
hlda <- read.table(file="topic-state.gz",sep="")
hlda <- read.table(file="topic-state4lev.txt",sep="") #
head(hlda)

#col 1 to 3 is the path (backwards), then token index, word, level
summary(hlda$V1)
table(hlda$V1) #Bottom-Level 19 values
summary(hlda$V2)
table(hlda$V2) #Med-Level 6 values
summary(hlda$V3) # Top-Level
summary(hlda$V4) # token index
summary(hlda$V6) #level [3] -> 0 is the top level
table(hlda$V6)

#Get paths and frequencies
paths <- unique(hlda[,1:4])
paths$V1[paths$V2==46]

for (i in c(14,12,2,7,32,12)) {
  print(paths$V1[paths$V2==i])
}

##LEVEL 0
write.table("Topic 0: Level 0", file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
#Subset by Level
lev0 <- hlda[hlda[,(levels+3)]==0,]

#Loop over relevant Indicator to get labels
lab <-sort(table(lev0[,levels+2]),decreasing=T)[1:2000]
write.table(matrix(names(lab)[1:20], nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
big <- names(lab[nchar(names(lab))>3])
write.table(matrix(big[1:20],nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)

##LEVEL 1
lev1 <- hlda[hlda[,(levels+3)]==1,]
write.table("Level 1", file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
lev1 <- na.omit(lev1)
index <- unique(lev1[,levels-1])
for(i in 1:length(index)) {
  write.table(paste(c("Level 1: Topic ",index[i]),sep="",collapse=""), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  tokenct <- sum(lev1[,(levels-1)]==(index[i]))
  write.table(paste(c("Token Count: ",tokenct),sep="",collapse=""), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  lab <-sort(table(lev1[lev1[,(levels-1)]==(index[i]),levels+2]),decreasing=T)[1:2000]
  write.table(matrix(names(lab)[1:20], nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  big <- names(lab[nchar(names(lab))>3])
  write.table(matrix(big[1:20],nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)  
}

##LEVEL 2
lev2 <- hlda[hlda[,(levels+3)]==2,]
write.table("Level 2", file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
lev2 <- na.omit(lev2)
index <- unique(lev2[,levels-2])
for(i in 1:length(index)) {
  write.table(paste(c("Level 2: Topic ",index[i]),sep="",collapse=""), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  tokenct <- sum(lev2[,(levels-2)]==(index[i]))
  write.table(paste(c("Token Count: ",tokenct),sep="",collapse=""), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  lab <-sort(table(lev2[lev2[,(levels-2)]==(index[i]),levels+2]),decreasing=T)[1:2000]
  write.table(matrix(names(lab)[1:20], nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  big <- names(lab[nchar(names(lab))>3])
  write.table(matrix(big[1:20],nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)  
}

##LEVEL 3
lev3 <- hlda[hlda[,(levels+3)]==3,]
write.table("Level 3", file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
lev3 <- na.omit(lev3)
index <- unique(lev3[,levels-3])
for(i in 1:length(index)) {
  write.table(paste(c("Level 3: Topic ",index[i]),sep="",collapse=""), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  tokenct <- sum(lev3[,(levels-3)]==(index[i]))
  write.table(paste(c("Token Count: ",tokenct),sep="",collapse=""), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  lab <-sort(table(lev3[lev3[,(levels-3)]==(index[i]),levels+2]),decreasing=T)[1:2000]
  write.table(matrix(names(lab)[1:20], nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)
  big <- names(lab[nchar(names(lab))>3])
  write.table(matrix(big[1:20],nrow=1), file="HLDA3.txt", append=T, quote=F,row.names=F, col.names=F)  
}
